#Overview

We previously loaded the raw sequencing data, performed quality control, and excluded poor quality cells and low frequency genes to create a filtered dataset.

We will now cluster the cells and visualize ciliary gene expression by cell type and disease state.

Block 1: Load libraries and set variables

library(Seurat)
library(tidyverse)
# Store subdirectory paths as variables
myWorkingDirectory = getwd()
dataDirectory <- file.path(myWorkingDirectory,"data")
figureOutputDirectory = file.path(myWorkingDirectory,"figures")
resultsOutputDirectory = file.path(myWorkingDirectory,"results")
my_theme <- theme_classic(base_size=10) +
  theme(plot.title = element_text(size = rel(1.5), hjust = 0.5),
        legend.title = element_text(size=rel(1.25)),
        axis.title = element_text(size = rel(1.25)),
        text = element_text(face="bold"),
        plot.margin=unit(c(0.25,0.25,0.25,0.25), "cm"))

Block 2: Load the filtered Seurat Object that we previously made

filtered_seurat <- readRDS(file.path(dataDirectory,"merged_filtered_seurat.RDS"))

Block 3: Normalization and scaling

filtered_seurat <- NormalizeData(filtered_seurat, assay = "RNA", normalization.method = "LogNormalize", scale.factor = 10,000)

filtered_seurat <- FindVariableFeatures(filtered_seurat, selection.method = "vst", nfeatures = 5000)

# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(filtered_seurat), 10)

# plot variable features with and without labels
VariableFeaturePlot(filtered_seurat)

all.genes <- rownames(filtered_seurat)
filtered_seurat <- ScaleData(filtered_seurat, features = all.genes)
## Centering and scaling data matrix
# Visualize
# Run PCA
filtered_seurat <- RunPCA(object = filtered_seurat)
## PC_ 1 
## Positive:  RHOC, GSTP1, TMSB4X, S100A11, PPIC, IFITM3, TAGLN2, LITAF, TACSTD2, TSPAN15 
##     CDC42EP1, KRT8, PRSS8, CD24, TMBIM1, PPAP2C, LAD1, LGALS3, ELF3, ANXA4 
##     TPM1, ANXA2, RBPMS, CTSH, BACE2, SERPING1, MYL12A, ANXA2P2, ATP1A1, CTSD 
## Negative:  RAB3B, NEUROD1, GPX3, SLC30A8, SLC7A8, SLC22A17, CNTN1, KIAA1244, SLC7A2, DPP6 
##     ENPP2, MIR7-3HG, F10, PEMT, PARM1, ABCC8, MAFB, CHGA, SSTR2, RFX6 
##     TMOD1, DNAJC12, GAD2, KCNK16, UCHL1, SEZ6L, SLC38A4, PPP1R1A, SCGB2A1, WNT4 
## PC_ 2 
## Positive:  SPARC, COL1A2, PDGFRB, COL3A1, NID1, COL15A1, COL4A1, COL1A1, RCN3, COL6A3 
##     BGN, COL4A2, PXDN, CDH11, IGFBP4, FMOD, PRKCDBP, TPM2, FKBP10, COL5A1 
##     PRRX1, F2R, HEYL, CALD1, MRC2, MFGE8, CTHRC1, LRP1, CALU, LRRC32 
## Negative:  CLDN7, SPINT1, CD24, ELF3, ANXA4, SLC9A3R1, ATP1A1, KRT8, SLC44A4, SERPINA5 
##     LAD1, PRSS8, GGT1, GJB1, HNF1B, HDHD3, PPAP2C, TMC4, ATP1B1, LGALS4 
##     CFTR, ST14, TSPAN15, CLDN4, CLDN3, LCN2, TACSTD2, PTPRF, MUC20, GOLM1 
## PC_ 3 
## Positive:  REG1A, PRSS1, REG1B, CTRB2, CPA2, CTRB1, SPINK1, RARRES2, PRSS3P2, PNLIPRP2 
##     PNLIP, PNLIPRP1, BCAT1, DPEP1, CTRC, SLC39A5, PRSS3, FAM129A, PDIA2, CELA2A 
##     CELA3A, PLA2G1B, TPST2, ANPEP, KLK1, CBS, GATA4, IGFBP2, FGL1, IL32 
## Negative:  CFTR, SCD5, ANXA2, ANXA2P2, CMTM7, DCDC2, VTCN1, CADM1, PON2, KRT19 
##     PPAP2C, SPP1, GSN, CLDN10, PROM1, SERPING1, IGFBP7, ERICH5, NRP1, FXYD2.1 
##     TNFRSF21, WWTR1, CTNNB1, ARL6IP1, TSPAN8, ANXA3, SLC1A1, HSD17B2, SYT13, SORL1 
## PC_ 4 
## Positive:  SLC22A17, F10, CRYBA2, GPX3, CD99L2, VGF, FXYD5, FXYD3, PAPPA2, TP53I13 
##     SMOC1, GPR119, GC, SMIM24, CHGA, FEV, IRX2, CD82, LOXL4, COTL1 
##     VSTM2L, FADS2, QSOX1, SLC7A2, KIAA1244, NEURL1, FKBP11, MAFB, CKB, CCDC86 
## Negative:  HADH, PCSK1, PFKFB2, INS, ADCYAP1, SYT13, CYYR1, NPTX2, SCD5, HERPUD1 
##     ENTPD3, TGFBR3, WSCD2, IAPP, UCHL1, CADM1, TRAM1, GPM6A, LDHB, PKIB 
##     DNAJB9, BMP5, PPP1CB, CASR, SORL1, C1orf127, PIPOX, ETNK1, BROX, IFNGR1 
## PC_ 5 
## Positive:  RGCC, ERG, S1PR1, PCAT19, PECAM1, PODXL, CDH5, ELTD1, ESAM, FLT1 
##     KDR, CLEC14A, PNP, TMEM173, MYCT1, MMRN2, GMFG, ACVRL1, GPR116, CALCRL 
##     PLVAP, CXCR4, IFI27, INSR, PCDH12, FAM101B, GPR4, GNG11, CD93, PPAP2B 
## Negative:  HADH, INS, SCD5, SYT13, UCHL1, PDX1, GSN, COL1A2, NPTX2, PCSK1 
##     PDGFRB, COL1A1, SURF4, WSCD2, COL3A1, C1orf127, ADCYAP1, TGFBR3, CASR, GAD2 
##     COL6A3, ADAMTS2, TMEM37, LRP1, SAMD11, PCOLCE, MXRA8, IAPP, PFKFB2, PRRX1
# Plot PCA
PCAPlot(filtered_seurat,
        split.by = "Disease",ncol=2) + NoLegend()

# Look at load on PC's
print(filtered_seurat[["pca"]],dims = 1:5, nfeatures = 10)
## PC_ 1 
## Positive:  RHOC, GSTP1, TMSB4X, S100A11, PPIC, IFITM3, TAGLN2, LITAF, TACSTD2, TSPAN15 
## Negative:  RAB3B, NEUROD1, GPX3, SLC30A8, SLC7A8, SLC22A17, CNTN1, KIAA1244, SLC7A2, DPP6 
## PC_ 2 
## Positive:  SPARC, COL1A2, PDGFRB, COL3A1, NID1, COL15A1, COL4A1, COL1A1, RCN3, COL6A3 
## Negative:  CLDN7, SPINT1, CD24, ELF3, ANXA4, SLC9A3R1, ATP1A1, KRT8, SLC44A4, SERPINA5 
## PC_ 3 
## Positive:  REG1A, PRSS1, REG1B, CTRB2, CPA2, CTRB1, SPINK1, RARRES2, PRSS3P2, PNLIPRP2 
## Negative:  CFTR, SCD5, ANXA2, ANXA2P2, CMTM7, DCDC2, VTCN1, CADM1, PON2, KRT19 
## PC_ 4 
## Positive:  SLC22A17, F10, CRYBA2, GPX3, CD99L2, VGF, FXYD5, FXYD3, PAPPA2, TP53I13 
## Negative:  HADH, PCSK1, PFKFB2, INS, ADCYAP1, SYT13, CYYR1, NPTX2, SCD5, HERPUD1 
## PC_ 5 
## Positive:  RGCC, ERG, S1PR1, PCAT19, PECAM1, PODXL, CDH5, ELTD1, ESAM, FLT1 
## Negative:  HADH, INS, SCD5, SYT13, UCHL1, PDX1, GSN, COL1A2, NPTX2, PCSK1
ElbowPlot(filtered_seurat)

DimHeatmap(filtered_seurat, dims=1:6, cells=500, balanced = TRUE)

DimHeatmap(filtered_seurat, dims=7:12, cells=500, balanced = TRUE)

DimHeatmap(filtered_seurat, dims=13:15, cells=500, balanced = TRUE)

The load is fading out by PC 11 or 122.

Graph-based clustering

For datasets of 3,000 - 5,000 cells, the resolution set between 0.4-1.4 generally yields good clustering.

The FindClusters() function allows us to enter a series of resolutions and will calculate the “granularity” of the clustering. This is very helpful for testing which resolution works for moving forward without having to run the function for each resolution.

numPC <- 30

# first, rerun UMAP using the selected number of dimensions
filtered_seurat <- RunUMAP(filtered_seurat, dims = 1:numPC, verbose = FALSE)

# Determine the K-nearest neighbor graph
filtered_seurat <- FindNeighbors(object = filtered_seurat,dims = 1:numPC)
## Computing nearest neighbor graph
## Computing SNN
# Determine the clusters for various resolutions. Could go 0.2 to 2   
filtered_seurat <- FindClusters(object = filtered_seurat,
                 resolution = c(0.4,0.6,0.8,1.0,1.2,1.4,1.6))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2438
## Number of edges: 78402
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9355
## Number of communities: 15
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2438
## Number of edges: 78402
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9201
## Number of communities: 16
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2438
## Number of edges: 78402
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9048
## Number of communities: 16
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2438
## Number of edges: 78402
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8896
## Number of communities: 16
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2438
## Number of edges: 78402
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8746
## Number of communities: 17
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2438
## Number of edges: 78402
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8605
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2438
## Number of edges: 78402
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8475
## Number of communities: 23
## Elapsed time: 0 seconds

###Plot the Clusters

To choose a resolution to start with, typically pick something in the middle of the range like 0.6 or 0.8.

# Assign identity of clusters.  Note the text can change depending on input. look in metadata for actual name
Idents(object = filtered_seurat) <- filtered_seurat@meta.data$RNA_snn_res.0.4

# Plot the UMAP
DimPlot(filtered_seurat,
        reduction = "umap",
        label = TRUE,
        label.size = 6) + ggtitle("Resolution 0.4")

Idents(object = filtered_seurat) <- filtered_seurat@meta.data$RNA_snn_res.0.6

# Plot the UMAP
DimPlot(filtered_seurat,
        reduction = "umap",
        label = TRUE,
        label.size = 6) + ggtitle("Resolution 0.6")

Idents(object = filtered_seurat) <- filtered_seurat@meta.data$RNA_snn_res.0.8

# Plot the UMAP
DimPlot(filtered_seurat,
        reduction = "umap",
        label = TRUE,
        label.size = 6) + ggtitle("Resolution 0.8")

I like resolution 0.6 the best. All outgroups have their own label, but not overly split.

Idents(object = filtered_seurat) <- filtered_seurat@meta.data$RNA_snn_res.0.6

Block 5: Cluster Identification using known cell type markers

First, ensure that the default assay is RNA, not variable features

DefaultAssay(filtered_seurat) <- "RNA"

Exocrine Acinar markers (PRSS1)

FeaturePlot(filtered_seurat,
             reduction = "umap", 
             features = c("PRSS1"), 
             order = TRUE,
             min.cutoff = 'q10', 
             label = TRUE)

Acinar cells are cluster 6.

Exocrine Ductal markers (KRT19)

FeaturePlot(filtered_seurat,
             reduction = "umap", 
             features = c("KRT19"), 
             order = TRUE,
             min.cutoff = 'q10', 
             label = TRUE)

Ductal cells are clusters 2 and 4.

Pancreatic Stellate Cells (TIMP1, FN1, POSTN, ACTA2)

FeaturePlot(filtered_seurat,
            reduction = "umap",
            features = c("TIMP1","FN1","POSTN","ACTA2"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE)

Stellate cells are cluster 13

Alpha Cells (GCG)

FeaturePlot(filtered_seurat,
            reduction = "umap",
            features = c("GCG"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE)

Alpha cells are 0, 1, 5, 9, 11, 12, and 15.

Beta Cells (INS)

FeaturePlot(filtered_seurat,
            reduction = "umap",
            features = c("INS"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE)

Beta cells are cluster 7, 10, and 14

Delta Cells (SST)

FeaturePlot(filtered_seurat,
            reduction = "umap",
            features = c("SST"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE)

Delta cells are cluster 8.

Gamma Cells (PPY)

FeaturePlot(filtered_seurat,
            reduction = "umap",
            features = c("PPY"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE)

Gamma cells are cluster 3.

Block 7: Relabel Clusters

# Rename all identities
filtered_seurat <- RenameIdents(object = filtered_seurat, 
                               "0" = "Alpha",
                               "1" = "Alpha",
                               "2" = "Ductal",
                               "3" = "Gamma",
                               "4" = "Ductal",
                               "5" = "Alpha",
                               "6" = "Acinar",
                               "7" = "Beta",
                               "8" = "Delta",
                               "9" = "Alpha",
                               "10" = "Beta",
                               "11" = "Alpha",
                               "12" = "Alpha",
                               "13" = "PSC",
                               "14" = "Beta",
                               "15" = "Alpha")

# Stash these cell names
filtered_seurat[["Pancreatic_CellTypes"]] <- Idents(filtered_seurat)

DimPlot(filtered_seurat,cols = DiscretePalette(n=7,palette = "glasbey")) + theme(legend.text = element_text(size=16),legend.key.size = unit(1.5,'lines'))

DotPlot(filtered_seurat,features = c("PRSS1","KRT19","FN1","GCG","INS","SST","PPY")) + RotatedAxis()

Block 8: Split Cells by Disease

# For simplicity, remove the exocrine and PSC cells
seurat.subset <- subset(filtered_seurat,ident = c("Alpha","Beta","Delta","Gamma"))

cellDisease <- paste0(seurat.subset@active.ident,".",seurat.subset@meta.data$Disease)

cellDisease <- factor(cellDisease,levels = 
                      c("Alpha.Control","Alpha.T2D", 
                        "Beta.Control","Beta.T2D",
                        "Delta.Control","Delta.T2D",
                        "Gamma.Control","Gamma.T2D"))

seurat.subset <- SetIdent(seurat.subset,value=cellDisease)

seurat.subset[["Cell_Disease_Ident"]]<-Idents(seurat.subset)

DimPlot(seurat.subset,
        cols = DiscretePalette(n=8, palette = "glasbey")) +
        theme(legend.text = element_text(size=16), 
              legend.key.size = unit(1.5,'lines'))

Block 9: Ciliary Gene expression in alpha and beta cells

First, average the expression of every gene by cell type

# cluster averages
endocrine.cluster.averages <- AverageExpression(seurat.subset,return.seurat = TRUE)
## Centering and scaling data matrix

Figure 4D - heatmap of selected ciliary genes in alpha and beta cells

# selected cilia genes. culled from Cilia Carta and literature review

selectCilia <- c("IFT57", "DNAAF2", "IFT80", "SPAG16", "DYNC2H1", "GALNT11", "IFT52", "DYNC2LI1", "DCTN5", "HIF1A", "PAFAH1B1", "RGN", "DNAH5", "DNAJA1", "BBS2", "ING2", "TMF1", "DNAAF2")

selectCilia <- selectCilia[order(selectCilia,decreasing = FALSE)]

# Subset average expression in  alpha and beta cells, split by disease
alpha.beta.average.only <- subset(endocrine.cluster.averages,ident=c("Alpha.Control","Alpha.T2D","Beta.Control","Beta.T2D"))

# Make a heatmap of ciliary gene expression

DoHeatmap(alpha.beta.average.only,
          features = selectCilia,
          draw.lines = FALSE, size=2)

pdf(file = file.path(figureOutputDirectory,"Fig4D_SelectCiliaGenes_AverageClusterExpression_Heatmap.pdf"),width = 4,height = 6,useDingbats = TRUE)

DoHeatmap(alpha.beta.average.only,
          features = selectCilia,
          draw.lines = FALSE, size=2)
dev.off()
## quartz_off_screen 
##                 2
png(file = file.path(figureOutputDirectory,"Fig4D_SelectCiliaGenes_AverageClusterExpression_Heatmap.png"),width = 4, height = 6, units = "in", res = 300)

DoHeatmap(alpha.beta.average.only,
          features = selectCilia,
          draw.lines = FALSE, size=2)
dev.off()
## quartz_off_screen 
##                 2

Block 10: Save the filtered, clustered seurat object

saveRDS(filtered_seurat,file.path(dataDirectory,"filtered_seurat_clustered.RDS"))

Session Info

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1      stringr_1.4.0      dplyr_1.0.8        purrr_0.3.4       
##  [5] readr_2.1.2        tidyr_1.2.0        tibble_3.1.6       ggplot2_3.3.5     
##  [9] tidyverse_1.3.1    SeuratObject_4.0.4 Seurat_4.1.0      
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.0          backports_1.4.1       plyr_1.8.7           
##   [4] igraph_1.3.0          lazyeval_0.2.2        splines_4.1.1        
##   [7] listenv_0.8.0         scattermore_0.8       digest_0.6.29        
##  [10] htmltools_0.5.2       fansi_1.0.3           magrittr_2.0.3       
##  [13] tensor_1.5            cluster_2.1.3         ROCR_1.0-11          
##  [16] tzdb_0.3.0            globals_0.14.0        modelr_0.1.8         
##  [19] matrixStats_0.61.0    spatstat.sparse_2.1-0 colorspace_2.0-3     
##  [22] rvest_1.0.2           ggrepel_0.9.1         haven_2.5.0          
##  [25] xfun_0.30             crayon_1.5.1          jsonlite_1.8.0       
##  [28] spatstat.data_2.2-0   survival_3.3-1        zoo_1.8-10           
##  [31] glue_1.6.2            polyclip_1.10-0       gtable_0.3.0         
##  [34] leiden_0.3.9          future.apply_1.8.1    abind_1.4-5          
##  [37] scales_1.2.0          DBI_1.1.2             spatstat.random_2.2-0
##  [40] miniUI_0.1.1.1        Rcpp_1.0.8.3          viridisLite_0.4.0    
##  [43] xtable_1.8-4          reticulate_1.24       spatstat.core_2.4-2  
##  [46] htmlwidgets_1.5.4     httr_1.4.2            RColorBrewer_1.1-3   
##  [49] ellipsis_0.3.2        ica_1.0-2             pkgconfig_2.0.3      
##  [52] farver_2.1.0          sass_0.4.1            uwot_0.1.11          
##  [55] dbplyr_2.1.1          deldir_1.0-6          utf8_1.2.2           
##  [58] tidyselect_1.1.2      labeling_0.4.2        rlang_1.0.2          
##  [61] reshape2_1.4.4        later_1.3.0           munsell_0.5.0        
##  [64] cellranger_1.1.0      tools_4.1.1           cli_3.2.0            
##  [67] generics_0.1.2        broom_0.8.0           ggridges_0.5.3       
##  [70] evaluate_0.15         fastmap_1.1.0         yaml_2.3.5           
##  [73] goftest_1.2-3         knitr_1.38            fs_1.5.2             
##  [76] fitdistrplus_1.1-8    RANN_2.6.1            pbapply_1.5-0        
##  [79] future_1.24.0         nlme_3.1-157          mime_0.12            
##  [82] xml2_1.3.3            compiler_4.1.1        rstudioapi_0.13      
##  [85] plotly_4.10.0         png_0.1-7             spatstat.utils_2.3-0 
##  [88] reprex_2.0.1          bslib_0.3.1           stringi_1.7.6        
##  [91] highr_0.9             RSpectra_0.16-0       lattice_0.20-45      
##  [94] Matrix_1.4-1          vctrs_0.4.1           pillar_1.7.0         
##  [97] lifecycle_1.0.1       spatstat.geom_2.4-0   lmtest_0.9-40        
## [100] jquerylib_0.1.4       RcppAnnoy_0.0.19      data.table_1.14.2    
## [103] cowplot_1.1.1         irlba_2.3.5           httpuv_1.6.5         
## [106] patchwork_1.1.1       R6_2.5.1              promises_1.2.0.1     
## [109] KernSmooth_2.23-20    gridExtra_2.3         parallelly_1.31.0    
## [112] codetools_0.2-18      MASS_7.3-56           assertthat_0.2.1     
## [115] withr_2.5.0           sctransform_0.3.3     mgcv_1.8-40          
## [118] parallel_4.1.1        hms_1.1.1             grid_4.1.1           
## [121] rpart_4.1.16          rmarkdown_2.13        Rtsne_0.15           
## [124] shiny_1.7.1           lubridate_1.8.0